library(rio)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.4 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
dataset <- read.csv(file= "owid-covid-data.csv", stringsAsFactors =FALSE, header=TRUE)
dim(dataset)
## [1] 100191 60
head(dataset)
tail(dataset)
summary(dataset)
## iso_code continent location date
## Length:100191 Length:100191 Length:100191 Length:100191
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## total_cases new_cases new_cases_smoothed total_deaths
## Min. : 1 Min. :-74347 Min. : -6223.0 Min. : 1
## 1st Qu.: 1285 1st Qu.: 2 1st Qu.: 7.6 1st Qu.: 54
## Median : 13634 Median : 74 Median : 92.3 Median : 399
## Mean : 1075182 Mean : 6037 Mean : 6064.2 Mean : 29024
## 3rd Qu.: 145379 3rd Qu.: 809 3rd Qu.: 852.1 3rd Qu.: 3846
## Max. :183781831 Max. :906017 Max. :826388.4 Max. :3977058
## NA's :3607 NA's :3610 NA's :4620 NA's :13760
## new_deaths new_deaths_smoothed total_cases_per_million
## Min. :-1918.0 Min. : -232.143 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.000 1st Qu.: 262.7
## Median : 2.0 Median : 1.429 Median : 1820.3
## Mean : 146.3 Mean : 131.873 Mean : 13250.9
## 3rd Qu.: 18.0 3rd Qu.: 14.500 3rd Qu.: 13885.6
## Max. :18050.0 Max. :14737.000 Max. :180133.3
## NA's :13604 NA's :4620 NA's :4121
## new_cases_per_million new_cases_smoothed_per_million total_deaths_per_million
## Min. :-2153.437 Min. :-276.825 Min. : 0.001
## 1st Qu.: 0.212 1st Qu.: 1.295 1st Qu.: 8.050
## Median : 8.453 Median : 11.193 Median : 52.015
## Mean : 76.035 Mean : 76.368 Mean : 292.531
## 3rd Qu.: 70.061 3rd Qu.: 78.577 3rd Qu.: 319.336
## Max. :18293.675 Max. :4083.500 Max. :5860.454
## NA's :4124 NA's :5129 NA's :14261
## new_deaths_per_million new_deaths_smoothed_per_million reproduction_rate
## Min. :-76.445 Min. :-10.921 Min. :-0.020
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.840
## Median : 0.134 Median : 0.158 Median : 1.010
## Mean : 1.551 Mean : 1.397 Mean : 1.003
## 3rd Qu.: 1.322 3rd Qu.: 1.283 3rd Qu.: 1.170
## Max. :218.329 Max. : 63.140 Max. : 5.770
## NA's :14105 NA's :5129 NA's :19444
## icu_patients icu_patients_per_million hosp_patients
## Min. : 0.0 Min. : 0.00 Min. : 0
## 1st Qu.: 32.0 1st Qu.: 4.79 1st Qu.: 118
## Median : 182.0 Median : 16.87 Median : 650
## Mean : 1055.7 Mean : 26.16 Mean : 4589
## 3rd Qu.: 717.5 3rd Qu.: 40.44 3rd Qu.: 2707
## Max. :28889.0 Max. :192.74 Max. :133214
## NA's :90081 NA's :90081 NA's :87630
## hosp_patients_per_million weekly_icu_admissions
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 23.44 1st Qu.: 10.75
## Median : 79.93 Median : 51.24
## Mean : 167.67 Mean : 272.98
## 3rd Qu.: 243.32 3rd Qu.: 220.33
## Max. :1532.57 Max. :4002.46
## NA's :87630 NA's :99307
## weekly_icu_admissions_per_million weekly_hosp_admissions
## Min. : 0.00 Min. : 0
## 1st Qu.: 1.68 1st Qu.: 50
## Median : 8.37 Median : 320
## Mean : 20.49 Mean : 3515
## 3rd Qu.: 22.68 3rd Qu.: 1694
## Max. :279.41 Max. :116323
## NA's :99307 NA's :98622
## weekly_hosp_admissions_per_million new_tests total_tests
## Min. : 0.00 Min. :-239172 Min. : 0
## 1st Qu.: 9.35 1st Qu.: 1658 1st Qu.: 162492
## Median : 40.56 Median : 6201 Median : 831234
## Mean : 107.90 Mean : 48049 Mean : 7823804
## 3rd Qu.: 123.39 3rd Qu.: 23961 3rd Qu.: 3476125
## Max. :2656.91 Max. :3740296 Max. :468269982
## NA's :98622 NA's :55294 NA's :55620
## total_tests_per_thousand new_tests_per_thousand new_tests_smoothed
## Min. : 0.00 Min. :-23.01 Min. : 0
## 1st Qu.: 14.39 1st Qu.: 0.14 1st Qu.: 1741
## Median : 72.44 Median : 0.63 Median : 6576
## Mean : 313.82 Mean : 2.13 Mean : 45621
## 3rd Qu.: 297.46 3rd Qu.: 1.96 3rd Qu.: 26470
## Max. :9257.57 Max. :327.09 Max. :3080396
## NA's :55620 NA's :55294 NA's :47968
## new_tests_smoothed_per_thousand positive_rate tests_per_case
## Min. : 0.00 Min. :0.00 Min. : 1.1
## 1st Qu.: 0.14 1st Qu.:0.02 1st Qu.: 7.7
## Median : 0.65 Median :0.05 Median : 18.5
## Mean : 2.04 Mean :0.09 Mean : 163.5
## 3rd Qu.: 2.01 3rd Qu.:0.13 3rd Qu.: 57.1
## Max. :90.85 Max. :0.93 Max. :50000.0
## NA's :47968 NA's :51439 NA's :52037
## tests_units total_vaccinations people_vaccinated
## Length:100191 Min. :0.000e+00 Min. :0.000e+00
## Class :character 1st Qu.:1.248e+05 1st Qu.:9.474e+04
## Mode :character Median :9.494e+05 Median :6.649e+05
## Mean :3.251e+07 Mean :1.807e+07
## 3rd Qu.:5.833e+06 3rd Qu.:3.913e+06
## Max. :3.221e+09 Max. :1.889e+09
## NA's :83301 NA's :84124
## people_fully_vaccinated new_vaccinations new_vaccinations_smoothed
## Min. : 1 Min. : 0 Min. : 0
## 1st Qu.: 51215 1st Qu.: 4562 1st Qu.: 879
## Median : 402035 Median : 26024 Median : 7075
## Mean : 10041995 Mean : 687973 Mean : 333569
## 3rd Qu.: 2313227 3rd Qu.: 142223 3rd Qu.: 44588
## Max. :884442915 Max. :47776982 Max. :43047167
## NA's :86962 NA's :86099 NA's :71014
## total_vaccinations_per_hundred people_vaccinated_per_hundred
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.33 1st Qu.: 1.95
## Median : 11.55 Median : 8.45
## Mean : 24.57 Mean : 16.07
## 3rd Qu.: 35.09 3rd Qu.: 24.24
## Max. :231.89 Max. :116.53
## NA's :83301 NA's :84124
## people_fully_vaccinated_per_hundred new_vaccinations_smoothed_per_million
## Min. : 0.00 Min. : 0
## 1st Qu.: 0.96 1st Qu.: 376
## Median : 4.33 Median : 1713
## Mean : 10.12 Mean : 3253
## 3rd Qu.: 13.20 3rd Qu.: 4656
## Max. :115.36 Max. :118759
## NA's :86962 NA's :71014
## stringency_index population population_density median_age
## Min. : 0.00 Min. :4.700e+01 Min. : 0.137 Min. :15.10
## 1st Qu.: 44.44 1st Qu.:2.226e+06 1st Qu.: 36.253 1st Qu.:22.20
## Median : 60.19 Median :9.890e+06 Median : 83.479 Median :29.90
## Mean : 58.26 Mean :1.247e+08 Mean : 386.855 Mean :30.56
## 3rd Qu.: 74.54 3rd Qu.:3.481e+07 3rd Qu.: 209.588 3rd Qu.:39.10
## Max. :100.00 Max. :7.795e+09 Max. :20546.766 Max. :48.20
## NA's :16095 NA's :648 NA's :7096 NA's :10655
## aged_65_older aged_70_older gdp_per_capita extreme_poverty
## Min. : 1.144 Min. : 0.526 Min. : 661.2 Min. : 0.10
## 1st Qu.: 3.466 1st Qu.: 2.043 1st Qu.: 4466.5 1st Qu.: 0.60
## Median : 6.378 Median : 3.871 Median : 12951.8 Median : 2.20
## Mean : 8.789 Mean : 5.564 Mean : 19283.0 Mean :13.41
## 3rd Qu.:14.312 3rd Qu.: 9.167 3rd Qu.: 27216.4 3rd Qu.:21.20
## Max. :27.049 Max. :18.493 Max. :116935.6 Max. :77.60
## NA's :11661 NA's :11150 NA's :10309 NA's :39527
## cardiovasc_death_rate diabetes_prevalence female_smokers male_smokers
## Min. : 79.37 Min. : 0.990 Min. : 0.10 Min. : 7.70
## 1st Qu.:167.29 1st Qu.: 5.310 1st Qu.: 1.90 1st Qu.:21.60
## Median :242.65 Median : 7.110 Median : 6.30 Median :31.40
## Mean :258.52 Mean : 7.928 Mean :10.57 Mean :32.69
## 3rd Qu.:329.63 3rd Qu.:10.080 3rd Qu.:19.30 3rd Qu.:41.10
## Max. :724.42 Max. :30.530 Max. :44.00 Max. :78.10
## NA's :10283 NA's :7971 NA's :29873 NA's :30900
## handwashing_facilities hospital_beds_per_thousand life_expectancy
## Min. : 1.19 Min. : 0.100 Min. :53.28
## 1st Qu.: 19.35 1st Qu.: 1.300 1st Qu.:67.92
## Median : 49.84 Median : 2.400 Median :74.62
## Mean : 50.83 Mean : 3.028 Mean :73.24
## 3rd Qu.: 83.24 3rd Qu.: 3.861 3rd Qu.:78.74
## Max. :100.00 Max. :13.800 Max. :86.75
## NA's :55008 NA's :18293 NA's :5051
## human_development_index excess_mortality
## Min. :0.394 Min. :-95.59
## 1st Qu.:0.602 1st Qu.: 0.41
## Median :0.748 Median : 7.34
## Mean :0.727 Mean : 18.24
## 3rd Qu.:0.848 3rd Qu.: 23.86
## Max. :0.957 Max. :409.69
## NA's :10139 NA's :96688
str(dataset)
## 'data.frame': 100191 obs. of 60 variables:
## $ iso_code : chr "AFG" "AFG" "AFG" "AFG" ...
## $ continent : chr "Asia" "Asia" "Asia" "Asia" ...
## $ location : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ date : chr "2020-02-24" "2020-02-25" "2020-02-26" "2020-02-27" ...
## $ total_cases : num 1 1 1 1 1 1 1 1 2 4 ...
## $ new_cases : num 1 0 0 0 0 0 0 0 1 2 ...
## $ new_cases_smoothed : num NA NA NA NA NA 0.143 0.143 0 0.143 0.429 ...
## $ total_deaths : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_deaths : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_deaths_smoothed : num NA NA NA NA NA 0 0 0 0 0 ...
## $ total_cases_per_million : num 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.051 0.103 ...
## $ new_cases_per_million : num 0.026 0 0 0 0 0 0 0 0.026 0.051 ...
## $ new_cases_smoothed_per_million : num NA NA NA NA NA 0.004 0.004 0 0.004 0.011 ...
## $ total_deaths_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_deaths_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_deaths_smoothed_per_million : num NA NA NA NA NA 0 0 0 0 0 ...
## $ reproduction_rate : num NA NA NA NA NA NA NA NA NA NA ...
## $ icu_patients : num NA NA NA NA NA NA NA NA NA NA ...
## $ icu_patients_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ hosp_patients : num NA NA NA NA NA NA NA NA NA NA ...
## $ hosp_patients_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ weekly_icu_admissions : num NA NA NA NA NA NA NA NA NA NA ...
## $ weekly_icu_admissions_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ weekly_hosp_admissions : num NA NA NA NA NA NA NA NA NA NA ...
## $ weekly_hosp_admissions_per_million : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_tests : num NA NA NA NA NA NA NA NA NA NA ...
## $ total_tests : num NA NA NA NA NA NA NA NA NA NA ...
## $ total_tests_per_thousand : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_tests_per_thousand : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_tests_smoothed : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_tests_smoothed_per_thousand : num NA NA NA NA NA NA NA NA NA NA ...
## $ positive_rate : num NA NA NA NA NA NA NA NA NA NA ...
## $ tests_per_case : num NA NA NA NA NA NA NA NA NA NA ...
## $ tests_units : chr "" "" "" "" ...
## $ total_vaccinations : num NA NA NA NA NA NA NA NA NA NA ...
## $ people_vaccinated : num NA NA NA NA NA NA NA NA NA NA ...
## $ people_fully_vaccinated : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_vaccinations : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_vaccinations_smoothed : num NA NA NA NA NA NA NA NA NA NA ...
## $ total_vaccinations_per_hundred : num NA NA NA NA NA NA NA NA NA NA ...
## $ people_vaccinated_per_hundred : num NA NA NA NA NA NA NA NA NA NA ...
## $ people_fully_vaccinated_per_hundred : num NA NA NA NA NA NA NA NA NA NA ...
## $ new_vaccinations_smoothed_per_million: num NA NA NA NA NA NA NA NA NA NA ...
## $ stringency_index : num 8.33 8.33 8.33 8.33 8.33 ...
## $ population : num 38928341 38928341 38928341 38928341 38928341 ...
## $ population_density : num 54.4 54.4 54.4 54.4 54.4 ...
## $ median_age : num 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 ...
## $ aged_65_older : num 2.58 2.58 2.58 2.58 2.58 ...
## $ aged_70_older : num 1.34 1.34 1.34 1.34 1.34 ...
## $ gdp_per_capita : num 1804 1804 1804 1804 1804 ...
## $ extreme_poverty : num NA NA NA NA NA NA NA NA NA NA ...
## $ cardiovasc_death_rate : num 597 597 597 597 597 ...
## $ diabetes_prevalence : num 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 ...
## $ female_smokers : num NA NA NA NA NA NA NA NA NA NA ...
## $ male_smokers : num NA NA NA NA NA NA NA NA NA NA ...
## $ handwashing_facilities : num 37.7 37.7 37.7 37.7 37.7 ...
## $ hospital_beds_per_thousand : num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
## $ life_expectancy : num 64.8 64.8 64.8 64.8 64.8 ...
## $ human_development_index : num 0.511 0.511 0.511 0.511 0.511 0.511 0.511 0.511 0.511 0.511 ...
## $ excess_mortality : num NA NA NA NA NA NA NA NA NA NA ...
sapply(dataset, class)
## iso_code continent
## "character" "character"
## location date
## "character" "character"
## total_cases new_cases
## "numeric" "numeric"
## new_cases_smoothed total_deaths
## "numeric" "numeric"
## new_deaths new_deaths_smoothed
## "numeric" "numeric"
## total_cases_per_million new_cases_per_million
## "numeric" "numeric"
## new_cases_smoothed_per_million total_deaths_per_million
## "numeric" "numeric"
## new_deaths_per_million new_deaths_smoothed_per_million
## "numeric" "numeric"
## reproduction_rate icu_patients
## "numeric" "numeric"
## icu_patients_per_million hosp_patients
## "numeric" "numeric"
## hosp_patients_per_million weekly_icu_admissions
## "numeric" "numeric"
## weekly_icu_admissions_per_million weekly_hosp_admissions
## "numeric" "numeric"
## weekly_hosp_admissions_per_million new_tests
## "numeric" "numeric"
## total_tests total_tests_per_thousand
## "numeric" "numeric"
## new_tests_per_thousand new_tests_smoothed
## "numeric" "numeric"
## new_tests_smoothed_per_thousand positive_rate
## "numeric" "numeric"
## tests_per_case tests_units
## "numeric" "character"
## total_vaccinations people_vaccinated
## "numeric" "numeric"
## people_fully_vaccinated new_vaccinations
## "numeric" "numeric"
## new_vaccinations_smoothed total_vaccinations_per_hundred
## "numeric" "numeric"
## people_vaccinated_per_hundred people_fully_vaccinated_per_hundred
## "numeric" "numeric"
## new_vaccinations_smoothed_per_million stringency_index
## "numeric" "numeric"
## population population_density
## "numeric" "numeric"
## median_age aged_65_older
## "numeric" "numeric"
## aged_70_older gdp_per_capita
## "numeric" "numeric"
## extreme_poverty cardiovasc_death_rate
## "numeric" "numeric"
## diabetes_prevalence female_smokers
## "numeric" "numeric"
## male_smokers handwashing_facilities
## "numeric" "numeric"
## hospital_beds_per_thousand life_expectancy
## "numeric" "numeric"
## human_development_index excess_mortality
## "numeric" "numeric"
#The class of Date is character, but the class should be date.
dataset$date <- as.Date(dataset$date)
#Creating a subset with only four columns that we need.
dataSubset <- data.frame("date" = dataset$date, "location" = dataset$location, "new_cases" = dataset$new_cases, "new_deaths" = dataset$new_deaths)
dataSubset
#The period of the study is from March 1st, 2020 to June 1st, 2021.
China<-dataSubset %>% filter (location =='China', date>= '2020-03-01',date<='2021-06-01')
US<-dataSubset %>% filter (location =='United States', date>= '2020-03-01',date<='2021-06-01')
India<-dataSubset %>% filter (location =='India', date>= '2020-03-01',date<='2021-06-01')
UK<-dataSubset %>% filter (location =='United Kingdom', date>= '2020-03-01',date<='2021-06-01')
Germany<-dataSubset %>% filter (location =='Germany', date>= '2020-03-01',date<='2021-06-01')
Belgium<-dataSubset %>% filter (location =='Belgium', date>= '2020-03-01',date<='2021-06-01')
Netherlands<-dataSubset %>% filter (location =='Netherlands', date>= '2020-03-01',date<='2021-06-01')
#There are missing values, in the new deaths variable, before first death cases occur, so we are replacing NA to 0.
India[is.na(India)] <-0
UK[is.na(UK)] <-0
Germany[is.na(Germany)] <-0
Belgium[is.na(Belgium)] <-0
Netherlands[is.na(Netherlands)] <-0
#Combine Germany Belgium and Netherlands as EU
EUCounts <- Germany[3:4] + Belgium[3:4] + Netherlands[3:4]
EUDates <- Germany[1]
EULocation<- matrix(rep('EU',458),458,1)
colnames(EULocation) <-c('location')
EU <- cbind(EUDates,EULocation,EUCounts)
cor(US$new_cases,US$new_deaths)
## [1] 0.7066979
cor(China$new_cases,China$new_deaths)
## [1] 0.4119176
cor(India$new_cases, India$new_deaths)
## [1] 0.9129279
cor(UK$new_cases, UK$new_deaths)
## [1] 0.5987431
cor(EU$new_cases, EU$new_deaths)
## [1] 0.5436605
#New cases and new deaths have a positive correlation in all countries. Since new cases reach zero, new deaths will also be zero. In this study we will focus on forecasting new cases only.
#Negative values are observed in new cases for China and UK. The negative values are false positives due to negative confirmatory PCR.
summary(US)
## date location new_cases new_deaths
## Min. :2020-03-01 Length:458 Min. : 7 Min. : 0.0
## 1st Qu.:2020-06-23 Class :character 1st Qu.: 29507 1st Qu.: 654.2
## Median :2020-10-15 Mode :character Median : 51334 Median :1014.0
## Mean :2020-10-15 Mean : 72687 Mean :1299.4
## 3rd Qu.:2021-02-06 3rd Qu.: 79390 3rd Qu.:1721.2
## Max. :2021-06-01 Max. :300462 Max. :4476.0
summary(China)
## date location new_cases new_deaths
## Min. :2020-03-01 Length:458 Min. : -1.00 Min. : 0.000
## 1st Qu.:2020-06-23 Class :character 1st Qu.: 9.00 1st Qu.: 0.000
## Median :2020-10-15 Mode :character Median : 15.00 Median : 0.000
## Mean :2020-10-15 Mean : 26.06 Mean : 3.932
## 3rd Qu.:2021-02-06 3rd Qu.: 25.00 3rd Qu.: 0.000
## Max. :2021-06-01 Max. :575.00 Max. :1290.000
summary(India)
## date location new_cases new_deaths
## Min. :2020-03-01 Length:458 Min. : 0 Min. : -1.0
## 1st Qu.:2020-06-23 Class :character 1st Qu.: 11046 1st Qu.: 121.5
## Median :2020-10-15 Mode :character Median : 29268 Median : 415.5
## Mean :2020-10-15 Mean : 61808 Mean : 731.7
## 3rd Qu.:2021-02-06 3rd Qu.: 66932 3rd Qu.: 878.2
## Max. :2021-06-01 Max. :414188 Max. :4529.0
summary(UK)
## date location new_cases new_deaths
## Min. :2020-03-01 Length:458 Min. :-4787 Min. : 0.0
## 1st Qu.:2020-06-23 Class :character 1st Qu.: 1422 1st Qu.: 18.0
## Median :2020-10-15 Mode :character Median : 3726 Median : 101.0
## Mean :2020-10-15 Mean : 9839 Mean : 279.6
## 3rd Qu.:2021-02-06 3rd Qu.:14918 3rd Qu.: 445.8
## Max. :2021-06-01 Max. :68192 Max. :1826.0
summary(EU)
## date location new_cases new_deaths
## Min. :2020-03-01 Length:458 Min. : 43 Min. :-105.00
## 1st Qu.:2020-06-23 Class :character 1st Qu.: 1956 1st Qu.: 27.25
## Median :2020-10-15 Mode :character Median : 9395 Median : 181.00
## Mean :2020-10-15 Mean :14051 Mean : 287.46
## 3rd Qu.:2021-02-06 3rd Qu.:22378 3rd Qu.: 436.00
## Max. :2021-06-01 Max. :61408 Max. :1923.00
#China has a false positive case, so we will change the new case value on 2020-06-03 from -1 to 0, and reduce the new case value on 2020-06-02 from 1 to 0.
China[95,3] <- 0
China[94,3] <- 0
summary(China)
## date location new_cases new_deaths
## Min. :2020-03-01 Length:458 Min. : 0.00 Min. : 0.000
## 1st Qu.:2020-06-23 Class :character 1st Qu.: 9.00 1st Qu.: 0.000
## Median :2020-10-15 Mode :character Median : 15.00 Median : 0.000
## Mean :2020-10-15 Mean : 26.06 Mean : 3.932
## 3rd Qu.:2021-02-06 3rd Qu.: 25.00 3rd Qu.: 0.000
## Max. :2021-06-01 Max. :575.00 Max. :1290.000
#UK has thousands of false positive cases, from the beginning, and updated the data by reducing 8,010 cases on April 4, 2021.
#In order to have an accurate data, we will import the data released by UK government. (https://coronavirus.data.gov.uk/details/cases)
UK_data <- read.csv(file= "UK_data.csv", stringsAsFactors =FALSE, header=TRUE)
Subset_UK <- data.frame("date" = UK_data$date, "location" = UK_data$areaName, "new_cases" = UK_data$newCasesBySpecimenDate)
Subset_UK <- Subset_UK[order(Subset_UK$date),]
UK<-Subset_UK %>% filter (date>= '2020-03-01',date<='2021-06-01')
summary(UK)
## date location new_cases
## Length:458 Length:458 Min. : 22
## Class :character Class :character 1st Qu.: 1466
## Mode :character Mode :character Median : 3753
## Mean : 9823
## 3rd Qu.:14344
## Max. :81519
ts_US <- ts(US$new_cases, frequency = 7)
ts_US
## Time Series:
## Start = c(1, 1)
## End = c(66, 3)
## Frequency = 7
## [1] 7 23 19 33 77 53 166 116 75 188
## [11] 365 439 633 759 234 1467 1833 2657 4494 6367
## [21] 5995 8873 11238 10619 12082 17856 18690 19630 18899 22075
## [31] 26314 32286 32222 32307 32386 29895 31390 30779 31235 35942
## [41] 34403 29083 27256 26939 28818 25408 30002 33066 27907 26062
## [51] 29820 25926 28911 33612 32327 30380 26488 23715 24575 26459
## [61] 29205 34907 27348 24321 24068 24528 24569 27429 26830 25176
## [71] 18805 19298 22927 20433 26817 24733 24135 18388 22389 20979
## [81] 22725 25769 23649 21099 20077 18662 19663 18551 22320 24478
## [91] 23632 18983 17414 21502 19857 21658 25403 21142 17928 17616
## [101] 18383 21111 23166 24828 25212 18941 19827 23667 27072 28537
## [111] 31544 32280 25150 32144 37072 35878 40326 45999 41330 40734
## [121] 41299 46420 51820 56636 51351 45693 50747 43106 60661 60119
## [131] 62509 68057 60017 58424 58930 68051 68090 75866 72236 62536
## [141] 60456 62107 64461 70579 68445 73319 64953 54816 56783 66412
## [151] 71873 67466 68712 56201 45568 45501 58774 54466 59349 59303
## [161] 54134 45769 47606 48003 56040 51316 65336 46951 39212 36652
## [171] 45033 47320 44041 48843 43064 34247 36482 40354 45172 45393
## [181] 46832 42754 34404 35362 41846 41018 44251 50382 43158 31218
## [191] 23613 27226 34049 36056 47769 41101 34372 34423 39492 39030
## [201] 45130 49282 42202 38447 51878 39866 39069 47103 48295 44673
## [211] 37528 33294 43338 39443 45650 54950 48577 35738 39376 45289
## [221] 51056 58599 56384 54953 45981 41774 52231 59778 64880 69147
## [231] 56759 49367 67717 61956 63301 76310 81933 82767 62204 67310
## [241] 76838 79426 91019 99264 89754 104900 85211 127198 104541 129367
## [251] 128033 127510 115170 120434 140504 146750 164750 180398 167926 136356
## [261] 162624 163958 173251 191510 198333 179436 146947 174089 175544 183403
## [271] 112526 207933 155661 140328 160321 188339 202894 223421 232644 215811
## [281] 181253 194409 224452 222648 231428 240089 217797 187971 194314 208992
## [291] 246644 239900 251832 192102 188015 198703 198107 229624 194155 97880
## [301] 226416 155845 174152 200408 233701 235600 153916 300462 208853 184005
## [311] 235042 255637 278337 295257 260967 213415 214664 226967 230301 235766
## [321] 242780 201858 177931 143598 176216 183261 193856 190760 170759 131198
## [331] 151677 147626 153961 168804 166613 142459 112152 134975 115303 121691
## [341] 124006 134422 104176 89746 90438 95265 95250 105764 99670 87219
## [351] 65135 54279 62498 70139 69911 79282 71696 57152 56159 72270
## [361] 74749 77504 77349 64626 51422 58098 57098 67191 68060 66419
## [371] 58254 41073 44917 57667 57895 62471 61513 53031 38278 56541
## [381] 54008 59280 60375 61651 55519 33822 51436 53688 86938 67546
## [391] 77377 62842 43223 69273 61429 66765 79119 69887 63261 35133
## [401] 77404 60674 75021 79894 82710 66687 46509 70075 77966 75403
## [411] 74308 80071 52532 42121 68105 60949 62855 67278 62411 53495
## [421] 32153 47568 50836 55150 58251 57919 45391 29403 50491 40723
## [431] 44704 47557 48130 33675 21427 36798 33662 35826 38076 42260
## [441] 28857 16875 28621 27789 29301 30206 27946 19798 12868 25815
## [451] 22739 23976 27448 21859 12001 6734 5775 22940
decompose_US <- decompose(ts_US, "additive")
plot(decompose_US)
ts_China <- ts(China$new_cases, frequency = 7)
decompose_China <- decompose(ts_China, "additive")
plot(decompose_China)
ts_India <- ts(India$new_cases, frequency = 7)
decompose_India <- decompose(ts_India, "additive")
plot(decompose_India)
ts_UK <- ts(UK$new_cases, frequency = 7)
decompose_UK <- decompose(ts_UK, "additive")
plot(decompose_UK)
ts_EU <- ts(EU$new_cases, frequency = 7)
decompose_EU <- decompose(ts_EU, "additive")
plot(decompose_EU)
#### 2f). EDA - Stationary check
acf(ts_US)
#ACF is decaying slowly and remains above the significance range (blue lines), which means it is non-stationary
adf.test(as.matrix(ts_US))
##
## Augmented Dickey-Fuller Test
##
## data: as.matrix(ts_US)
## Dickey-Fuller = -0.51593, Lag order = 7, p-value = 0.981
## alternative hypothesis: stationary
#Conducting Augmented Dickey-Fuller Test to check if the data is stationary
#p-value is greater than 0.05, which means it is non-stationary again.
acf(ts_China, lag.max = 34)
adf.test(as.matrix(ts_China))
## Warning in adf.test(as.matrix(ts_China)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: as.matrix(ts_China)
## Dickey-Fuller = -4.8024, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#ts_China is stationary
acf(ts_India, lag.max = 34)
adf.test(as.matrix(ts_India))
## Warning in adf.test(as.matrix(ts_India)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: as.matrix(ts_India)
## Dickey-Fuller = -4.5689, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#ts_India seems non-stationary from its ACF, but passed the ADF test
acf(ts_UK)
adf.test(as.matrix(ts_UK))
##
## Augmented Dickey-Fuller Test
##
## data: as.matrix(ts_UK)
## Dickey-Fuller = -1.3237, Lag order = 7, p-value = 0.8639
## alternative hypothesis: stationary
#ts_UK is non-stationary
acf(ts_EU)
adf.test(as.matrix(ts_EU))
##
## Augmented Dickey-Fuller Test
##
## data: as.matrix(ts_EU)
## Dickey-Fuller = -0.37683, Lag order = 7, p-value = 0.9873
## alternative hypothesis: stationary
#ts_EU is non-stationary
#finding the best ARIMA model with respect to AIC value
model_US <- auto.arima(ts_US, ic="aic", trace =TRUE, approximation = F)
##
## ARIMA(2,1,2)(1,0,1)[7] with drift : 10037.12
## ARIMA(0,1,0) with drift : 10273.22
## ARIMA(1,1,0)(1,0,0)[7] with drift : 10077.63
## ARIMA(0,1,1)(0,0,1)[7] with drift : 10058.91
## ARIMA(0,1,0) : 10271.23
## ARIMA(2,1,2)(0,0,1)[7] with drift : 10063.56
## ARIMA(2,1,2)(1,0,0)[7] with drift : Inf
## ARIMA(2,1,2)(2,0,1)[7] with drift : 10048.39
## ARIMA(2,1,2)(1,0,2)[7] with drift : Inf
## ARIMA(2,1,2) with drift : Inf
## ARIMA(2,1,2)(0,0,2)[7] with drift : 10059.9
## ARIMA(2,1,2)(2,0,0)[7] with drift : Inf
## ARIMA(2,1,2)(2,0,2)[7] with drift : Inf
## ARIMA(1,1,2)(1,0,1)[7] with drift : Inf
## ARIMA(2,1,1)(1,0,1)[7] with drift : 10035.15
## ARIMA(2,1,1)(0,0,1)[7] with drift : 10061.56
## ARIMA(2,1,1)(1,0,0)[7] with drift : 10046.47
## ARIMA(2,1,1)(2,0,1)[7] with drift : 10048.29
## ARIMA(2,1,1)(1,0,2)[7] with drift : Inf
## ARIMA(2,1,1) with drift : 10167.96
## ARIMA(2,1,1)(0,0,2)[7] with drift : 10057.93
## ARIMA(2,1,1)(2,0,0)[7] with drift : 10047.81
## ARIMA(2,1,1)(2,0,2)[7] with drift : Inf
## ARIMA(1,1,1)(1,0,1)[7] with drift : 10036.39
## ARIMA(2,1,0)(1,0,1)[7] with drift : 10037.41
## ARIMA(3,1,1)(1,0,1)[7] with drift : 10037.13
## ARIMA(1,1,0)(1,0,1)[7] with drift : 10078.3
## ARIMA(3,1,0)(1,0,1)[7] with drift : 10035.81
## ARIMA(3,1,2)(1,0,1)[7] with drift : Inf
## ARIMA(2,1,1)(1,0,1)[7] : 10033.15
## ARIMA(2,1,1)(0,0,1)[7] : 10059.56
## ARIMA(2,1,1)(1,0,0)[7] : 10044.47
## ARIMA(2,1,1)(2,0,1)[7] : 10046.29
## ARIMA(2,1,1)(1,0,2)[7] : Inf
## ARIMA(2,1,1) : 10165.97
## ARIMA(2,1,1)(0,0,2)[7] : 10055.94
## ARIMA(2,1,1)(2,0,0)[7] : Inf
## ARIMA(2,1,1)(2,0,2)[7] : Inf
## ARIMA(1,1,1)(1,0,1)[7] : 10034.39
## ARIMA(2,1,0)(1,0,1)[7] : 10035.41
## ARIMA(3,1,1)(1,0,1)[7] : 10035.13
## ARIMA(2,1,2)(1,0,1)[7] : 10035.12
## ARIMA(1,1,0)(1,0,1)[7] : 10076.3
## ARIMA(1,1,2)(1,0,1)[7] : Inf
## ARIMA(3,1,0)(1,0,1)[7] : 10033.81
## ARIMA(3,1,2)(1,0,1)[7] : Inf
##
## Best model: ARIMA(2,1,1)(1,0,1)[7]
model_US
## Series: ts_US
## ARIMA(2,1,1)(1,0,1)[7]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sma1
## -0.4015 -0.1649 -0.3093 0.9218 -0.6703
## s.e. 0.1357 0.0888 0.1334 0.0376 0.0894
##
## sigma^2 estimated as 195556347: log likelihood=-5010.57
## AIC=10033.15 AICc=10033.34 BIC=10057.9
#checking stationary after seasonal differencing
acf(ts(model_US$residuals))
pacf(ts(model_US$residuals))
#ACF shows exponential decays and indicates it is stationary
#forecasting 1 month ahead with a 95% interval
forecast_US<-forecast(model_US, h=4, level=c(95))
forecast_US
## Point Forecast Lo 95 Hi 95
## 66.42857 15676.158 -11732.27 43084.58
## 66.57143 18057.317 -10474.36 46588.99
## 66.71429 18069.088 -12592.22 48730.39
## 66.85714 8893.088 -24455.61 42241.79
plot(forecast_US)
summary(forecast_US)
##
## Forecast method: ARIMA(2,1,1)(1,0,1)[7]
##
## Model Information:
## Series: ts_US
## ARIMA(2,1,1)(1,0,1)[7]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sma1
## -0.4015 -0.1649 -0.3093 0.9218 -0.6703
## s.e. 0.1357 0.0888 0.1334 0.0376 0.0894
##
## sigma^2 estimated as 195556347: log likelihood=-5010.57
## AIC=10033.15 AICc=10033.34 BIC=10057.9
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -66.87734 13892.25 7090.738 0.5004233 11.76567 0.5716284
## ACF1
## Training set -0.001105334
##
## Forecasts:
## Point Forecast Lo 95 Hi 95
## 66.42857 15676.158 -11732.27 43084.58
## 66.57143 18057.317 -10474.36 46588.99
## 66.71429 18069.088 -12592.22 48730.39
## 66.85714 8893.088 -24455.61 42241.79
accuracy(forecast_US)
## ME RMSE MAE MPE MAPE MASE
## Training set -66.87734 13892.25 7090.738 0.5004233 11.76567 0.5716284
## ACF1
## Training set -0.001105334
#actual new cases in US on July 1, 2021
(dataset %>% filter (location =='United States', date=='2021-07-01'))[6]
#14463
#difference between the actual and predicted
abs(14463 - 8893.088)
## [1] 5569.912
#finding the best ARIMA model with respect to AIC value
model_China <- auto.arima(ts_China, ic="aic", trace =TRUE, approximation = F)
##
## ARIMA(2,1,2)(1,0,1)[7] with drift : 4367.799
## ARIMA(0,1,0) with drift : 4439.068
## ARIMA(1,1,0)(1,0,0)[7] with drift : 4404.438
## ARIMA(0,1,1)(0,0,1)[7] with drift : 4388.941
## ARIMA(0,1,0) : 4437.759
## ARIMA(2,1,2)(0,0,1)[7] with drift : 4366.334
## ARIMA(2,1,2) with drift : 4366.819
## ARIMA(2,1,2)(0,0,2)[7] with drift : 4367.357
## ARIMA(2,1,2)(1,0,0)[7] with drift : 4366.043
## ARIMA(2,1,2)(2,0,0)[7] with drift : 4367.607
## ARIMA(2,1,2)(2,0,1)[7] with drift : 4368.416
## ARIMA(1,1,2)(1,0,0)[7] with drift : 4372.232
## ARIMA(2,1,1)(1,0,0)[7] with drift : 4390.203
## ARIMA(3,1,2)(1,0,0)[7] with drift : 4367.07
## ARIMA(2,1,3)(1,0,0)[7] with drift : 4367.543
## ARIMA(1,1,1)(1,0,0)[7] with drift : 4390.926
## ARIMA(1,1,3)(1,0,0)[7] with drift : 4365.681
## ARIMA(1,1,3) with drift : 4366.188
## ARIMA(1,1,3)(2,0,0)[7] with drift : 4367.359
## ARIMA(1,1,3)(1,0,1)[7] with drift : 4367.502
## ARIMA(1,1,3)(0,0,1)[7] with drift : 4365.928
## ARIMA(1,1,3)(2,0,1)[7] with drift : Inf
## ARIMA(0,1,3)(1,0,0)[7] with drift : 4381.777
## ARIMA(1,1,4)(1,0,0)[7] with drift : 4367.255
## ARIMA(0,1,2)(1,0,0)[7] with drift : 4390.916
## ARIMA(0,1,4)(1,0,0)[7] with drift : 4369.33
## ARIMA(2,1,4)(1,0,0)[7] with drift : 4368.988
## ARIMA(1,1,3)(1,0,0)[7] : 4364.613
## ARIMA(1,1,3) : 4365.13
## ARIMA(1,1,3)(2,0,0)[7] : 4366.281
## ARIMA(1,1,3)(1,0,1)[7] : 4366.43
## ARIMA(1,1,3)(0,0,1)[7] : 4364.863
## ARIMA(1,1,3)(2,0,1)[7] : 4366.94
## ARIMA(0,1,3)(1,0,0)[7] : 4380.931
## ARIMA(1,1,2)(1,0,0)[7] : 4371.135
## ARIMA(2,1,3)(1,0,0)[7] : 4366.483
## ARIMA(1,1,4)(1,0,0)[7] : 4366.213
## ARIMA(0,1,2)(1,0,0)[7] : 4390.109
## ARIMA(0,1,4)(1,0,0)[7] : 4368.414
## ARIMA(2,1,2)(1,0,0)[7] : 4364.992
## ARIMA(2,1,4)(1,0,0)[7] : 4367.944
##
## Best model: ARIMA(1,1,3)(1,0,0)[7]
model_China
## Series: ts_China
## ARIMA(1,1,3)(1,0,0)[7]
##
## Coefficients:
## ar1 ma1 ma2 ma3 sar1
## 0.8121 -1.3398 0.3813 0.1832 -0.1093
## s.e. 0.0962 0.0982 0.1103 0.0605 0.0687
##
## sigma^2 estimated as 808.6: log likelihood=-2176.31
## AIC=4364.61 AICc=4364.8 BIC=4389.36
#checking stationary after seasonal differencing
acf(ts(model_China$residuals))
pacf(ts(model_China$residuals))
#ACF shows exponential decays and indicates it is stationary
#forecasting 1 month ahead with a 95% interval
forecast_China<-forecast(model_China, h=4, level=c(95))
forecast_China
## Point Forecast Lo 95 Hi 95
## 66.42857 22.69106 -33.04218 78.42429
## 66.57143 25.92905 -35.70845 87.56655
## 66.71429 27.30238 -38.73258 93.33734
## 66.85714 29.76334 -43.51591 103.04259
plot(forecast_China)
summary(forecast_China)
##
## Forecast method: ARIMA(1,1,3)(1,0,0)[7]
##
## Model Information:
## Series: ts_China
## ARIMA(1,1,3)(1,0,0)[7]
##
## Coefficients:
## ar1 ma1 ma2 ma3 sar1
## 0.8121 -1.3398 0.3813 0.1832 -0.1093
## s.e. 0.0962 0.0982 0.1103 0.0605 0.0687
##
## sigma^2 estimated as 808.6: log likelihood=-2176.31
## AIC=4364.61 AICc=4364.8 BIC=4389.36
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.044764 28.24897 11.63521 -Inf Inf 0.658569 0.1718338
##
## Forecasts:
## Point Forecast Lo 95 Hi 95
## 66.42857 22.69106 -33.04218 78.42429
## 66.57143 25.92905 -35.70845 87.56655
## 66.71429 27.30238 -38.73258 93.33734
## 66.85714 29.76334 -43.51591 103.04259
accuracy(forecast_China)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.044764 28.24897 11.63521 -Inf Inf 0.658569 0.1718338
#actual new cases in China on July 1, 2021
(dataset %>% filter (location =='China', date=='2021-07-01'))[6]
#18
#difference between the actual and predicted
abs(18 - 29.76)
## [1] 11.76
#finding the best ARIMA model with respect to AIC value
model_India <- auto.arima(ts_India, ic="aic", trace =TRUE, approximation = FALSE)
##
## ARIMA(2,0,2)(1,1,1)[7] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[7] with drift : 10456.23
## ARIMA(1,0,0)(1,1,0)[7] with drift : 9165.984
## ARIMA(0,0,1)(0,1,1)[7] with drift : 9868.198
## ARIMA(0,0,0)(0,1,0)[7] : 10458.52
## ARIMA(1,0,0)(0,1,0)[7] with drift : 9203.874
## ARIMA(1,0,0)(2,1,0)[7] with drift : Inf
## ARIMA(1,0,0)(1,1,1)[7] with drift : Inf
## ARIMA(1,0,0)(0,1,1)[7] with drift : Inf
## ARIMA(1,0,0)(2,1,1)[7] with drift : Inf
## ARIMA(0,0,0)(1,1,0)[7] with drift : 10030.14
## ARIMA(2,0,0)(1,1,0)[7] with drift : Inf
## ARIMA(1,0,1)(1,1,0)[7] with drift : Inf
## ARIMA(0,0,1)(1,1,0)[7] with drift : 9766.506
## ARIMA(2,0,1)(1,1,0)[7] with drift : Inf
## ARIMA(1,0,0)(1,1,0)[7] : 9164.187
## ARIMA(1,0,0)(0,1,0)[7] : 9201.967
## ARIMA(1,0,0)(2,1,0)[7] : Inf
## ARIMA(1,0,0)(1,1,1)[7] : Inf
## ARIMA(1,0,0)(0,1,1)[7] : Inf
## ARIMA(1,0,0)(2,1,1)[7] : Inf
## ARIMA(0,0,0)(1,1,0)[7] : 10028.51
## ARIMA(2,0,0)(1,1,0)[7] : Inf
## ARIMA(1,0,1)(1,1,0)[7] : Inf
## ARIMA(0,0,1)(1,1,0)[7] : 9764.541
## ARIMA(2,0,1)(1,1,0)[7] : Inf
##
## Best model: ARIMA(1,0,0)(1,1,0)[7]
model_India
## Series: ts_India
## ARIMA(1,0,0)(1,1,0)[7]
##
## Coefficients:
## ar1 sar1
## 0.9878 -0.2954
## s.e. 0.0082 0.0454
##
## sigma^2 estimated as 38430875: log likelihood=-4579.09
## AIC=9164.19 AICc=9164.24 BIC=9176.52
#checking stationary after seasonal differencing
acf(ts(model_India$residuals))
pacf(ts(model_India$residuals))
#ACF shows exponential decays and indicates it is stationary
#forecasting 1 month ahead with a 95% interval
forecast_India<-forecast(model_India, h=4, level=c(95))
forecast_India
## Point Forecast Lo 95 Hi 95
## 66.42857 138196.43 126046.10 150346.8
## 66.57143 116863.73 99785.19 133942.3
## 66.71429 108452.48 87662.49 129242.5
## 66.85714 98886.52 75025.27 122747.8
plot(forecast_India)
summary(forecast_India)
##
## Forecast method: ARIMA(1,0,0)(1,1,0)[7]
##
## Model Information:
## Series: ts_India
## ARIMA(1,0,0)(1,1,0)[7]
##
## Coefficients:
## ar1 sar1
## 0.9878 -0.2954
## s.e. 0.0082 0.0454
##
## sigma^2 estimated as 38430875: log likelihood=-4579.09
## AIC=9164.19 AICc=9164.24 BIC=9176.52
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -157.6634 6138.052 3382.382 -Inf Inf 0.2670619 -0.1550257
##
## Forecasts:
## Point Forecast Lo 95 Hi 95
## 66.42857 138196.43 126046.10 150346.8
## 66.57143 116863.73 99785.19 133942.3
## 66.71429 108452.48 87662.49 129242.5
## 66.85714 98886.52 75025.27 122747.8
accuracy(forecast_India)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -157.6634 6138.052 3382.382 -Inf Inf 0.2670619 -0.1550257
#actual new cases in India on July 1, 2021
(dataset %>% filter (location =='India', date=='2021-07-01'))[6]
#46617
#difference between the actual and predicted
abs(46617 -98886.52)
## [1] 52269.52
#finding the best ARIMA model with respect to AIC value
model_UK <- auto.arima(ts_UK, ic="aic", trace =TRUE, approximation = F)
##
## ARIMA(2,1,2)(1,0,1)[7] with drift : 8746.007
## ARIMA(0,1,0) with drift : 8940.362
## ARIMA(1,1,0)(1,0,0)[7] with drift : 8790.661
## ARIMA(0,1,1)(0,0,1)[7] with drift : 8793.067
## ARIMA(0,1,0) : 8938.365
## ARIMA(2,1,2)(0,0,1)[7] with drift : 8751.76
## ARIMA(2,1,2)(1,0,0)[7] with drift : 8746.171
## ARIMA(2,1,2)(2,0,1)[7] with drift : Inf
## ARIMA(2,1,2)(1,0,2)[7] with drift : 8738.516
## ARIMA(2,1,2)(0,0,2)[7] with drift : 8763.425
## ARIMA(2,1,2)(2,0,2)[7] with drift : 8739.364
## ARIMA(1,1,2)(1,0,2)[7] with drift : 8737.044
## ARIMA(1,1,2)(0,0,2)[7] with drift : 8761.517
## ARIMA(1,1,2)(1,0,1)[7] with drift : 8745.039
## ARIMA(1,1,2)(2,0,2)[7] with drift : 8738.015
## ARIMA(1,1,2)(0,0,1)[7] with drift : 8775.854
## ARIMA(1,1,2)(2,0,1)[7] with drift : Inf
## ARIMA(0,1,2)(1,0,2)[7] with drift : 8735.29
## ARIMA(0,1,2)(0,0,2)[7] with drift : 8759.553
## ARIMA(0,1,2)(1,0,1)[7] with drift : 8743.449
## ARIMA(0,1,2)(2,0,2)[7] with drift : 8736.319
## ARIMA(0,1,2)(0,0,1)[7] with drift : 8774.647
## ARIMA(0,1,2)(2,0,1)[7] with drift : Inf
## ARIMA(0,1,1)(1,0,2)[7] with drift : 8740.985
## ARIMA(0,1,3)(1,0,2)[7] with drift : 8736.885
## ARIMA(1,1,1)(1,0,2)[7] with drift : 8736.771
## ARIMA(1,1,3)(1,0,2)[7] with drift : Inf
## ARIMA(0,1,2)(1,0,2)[7] : 8733.297
## ARIMA(0,1,2)(0,0,2)[7] : 8757.563
## ARIMA(0,1,2)(1,0,1)[7] : 8741.457
## ARIMA(0,1,2)(2,0,2)[7] : 8734.324
## ARIMA(0,1,2)(0,0,1)[7] : 8772.657
## ARIMA(0,1,2)(2,0,1)[7] : Inf
## ARIMA(0,1,1)(1,0,2)[7] : 8738.99
## ARIMA(1,1,2)(1,0,2)[7] : 8735.05
## ARIMA(0,1,3)(1,0,2)[7] : 8734.891
## ARIMA(1,1,1)(1,0,2)[7] : 8734.778
## ARIMA(1,1,3)(1,0,2)[7] : Inf
##
## Best model: ARIMA(0,1,2)(1,0,2)[7]
model_UK
## Series: ts_UK
## ARIMA(0,1,2)(1,0,2)[7]
##
## Coefficients:
## ma1 ma2 sar1 sma1 sma2
## -0.4246 -0.1336 0.9075 -0.4345 -0.2515
## s.e. 0.0481 0.0465 0.0424 0.0678 0.0562
##
## sigma^2 estimated as 11405895: log likelihood=-4360.65
## AIC=8733.3 AICc=8733.48 BIC=8758.05
#checking stationary after seasonal differencing
acf(ts(model_UK$residuals))
pacf(ts(model_UK$residuals))
#ACF shows exponential decays and indicates it is stationary
#forecasting 1 month ahead with a 95% interval
forecast_UK<-forecast(model_UK, h=4, level=c(95))
forecast_UK
## Point Forecast Lo 95 Hi 95
## 66.42857 4857.418 -1761.893 11476.73
## 66.57143 4508.709 -3128.030 12145.45
## 66.71429 4333.070 -3844.400 12510.54
## 66.85714 3948.322 -4736.275 12632.92
plot(forecast_UK)
summary(forecast_UK)
##
## Forecast method: ARIMA(0,1,2)(1,0,2)[7]
##
## Model Information:
## Series: ts_UK
## ARIMA(0,1,2)(1,0,2)[7]
##
## Coefficients:
## ma1 ma2 sar1 sma1 sma2
## -0.4246 -0.1336 0.9075 -0.4345 -0.2515
## s.e. 0.0481 0.0465 0.0424 0.0678 0.0562
##
## sigma^2 estimated as 11405895: log likelihood=-4360.65
## AIC=8733.3 AICc=8733.48 BIC=8758.05
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 8.504075 3355.067 1231.412 1.607446 11.84683 0.4768903
## ACF1
## Training set -0.005361969
##
## Forecasts:
## Point Forecast Lo 95 Hi 95
## 66.42857 4857.418 -1761.893 11476.73
## 66.57143 4508.709 -3128.030 12145.45
## 66.71429 4333.070 -3844.400 12510.54
## 66.85714 3948.322 -4736.275 12632.92
accuracy(forecast_UK)
## ME RMSE MAE MPE MAPE MASE
## Training set 8.504075 3355.067 1231.412 1.607446 11.84683 0.4768903
## ACF1
## Training set -0.005361969
#actual new cases in UK on July 1, 2021 is 28071
#difference between the actual and predicted
abs(28071 - 3944.238)
## [1] 24126.76
#finding the best ARIMA model with respect to AIC value
model_EU <- auto.arima(ts_EU, ic="aic", trace =TRUE, approximation = F)
##
## ARIMA(2,0,2)(1,1,1)[7] with drift : 9002.91
## ARIMA(0,0,0)(0,1,0)[7] with drift : 9198.97
## ARIMA(1,0,0)(1,1,0)[7] with drift : 9153.132
## ARIMA(0,0,1)(0,1,1)[7] with drift : 9167.494
## ARIMA(0,0,0)(0,1,0)[7] : 9197.177
## ARIMA(2,0,2)(0,1,1)[7] with drift : 9007.764
## ARIMA(2,0,2)(1,1,0)[7] with drift : 9055.255
## ARIMA(2,0,2)(2,1,1)[7] with drift : 9002.596
## ARIMA(2,0,2)(2,1,0)[7] with drift : 8987.179
## ARIMA(1,0,2)(2,1,0)[7] with drift : 9000.174
## ARIMA(2,0,1)(2,1,0)[7] with drift : 9000.452
## ARIMA(3,0,2)(2,1,0)[7] with drift : 9004.215
## ARIMA(2,0,3)(2,1,0)[7] with drift : 9002.359
## ARIMA(1,0,1)(2,1,0)[7] with drift : 9001.996
## ARIMA(1,0,3)(2,1,0)[7] with drift : 9001.645
## ARIMA(3,0,1)(2,1,0)[7] with drift : 9002.164
## ARIMA(3,0,3)(2,1,0)[7] with drift : 8998.186
## ARIMA(2,0,2)(2,1,0)[7] : 8985.228
## ARIMA(2,0,2)(1,1,0)[7] : 9053.309
## ARIMA(2,0,2)(2,1,1)[7] : 9000.598
## ARIMA(2,0,2)(1,1,1)[7] : 9000.977
## ARIMA(1,0,2)(2,1,0)[7] : 8998.178
## ARIMA(2,0,1)(2,1,0)[7] : 8998.456
## ARIMA(3,0,2)(2,1,0)[7] : 9002.202
## ARIMA(2,0,3)(2,1,0)[7] : 9000.362
## ARIMA(1,0,1)(2,1,0)[7] : 8999.998
## ARIMA(1,0,3)(2,1,0)[7] : 8999.651
## ARIMA(3,0,1)(2,1,0)[7] : 9000.169
## ARIMA(3,0,3)(2,1,0)[7] : 8996.191
##
## Best model: ARIMA(2,0,2)(2,1,0)[7]
model_EU
## Series: ts_EU
## ARIMA(2,0,2)(2,1,0)[7]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 sar2
## 1.8627 -0.8763 -1.6835 0.7546 -0.5989 -0.4088
## s.e. 0.0733 0.0698 0.0591 0.0404 0.0447 0.0451
##
## sigma^2 estimated as 25618041: log likelihood=-4485.61
## AIC=8985.23 AICc=8985.48 BIC=9014.01
#checking stationary after seasonal differencing
acf(ts(model_EU$residuals))
pacf(ts(model_EU$residuals))
#ACF shows exponential decays and indicates it is stationary
#forecasting 1 month ahead with a 95% interval
forecast_EU<-forecast(model_EU, h=4, level=c(95))
forecast_EU
## Point Forecast Lo 95 Hi 95
## 66.42857 8306.906 -1613.30826 18227.12
## 66.57143 12722.398 2644.21012 22800.59
## 66.71429 10265.902 -29.35596 20561.16
## 66.85714 7650.100 -2912.08897 18212.29
plot(forecast_EU)
summary(forecast_EU)
##
## Forecast method: ARIMA(2,0,2)(2,1,0)[7]
##
## Model Information:
## Series: ts_EU
## ARIMA(2,0,2)(2,1,0)[7]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 sar2
## 1.8627 -0.8763 -1.6835 0.7546 -0.5989 -0.4088
## s.e. 0.0733 0.0698 0.0591 0.0404 0.0447 0.0451
##
## sigma^2 estimated as 25618041: log likelihood=-4485.61
## AIC=8985.23 AICc=8985.48 BIC=9014.01
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 57.60673 4989.077 2799.547 -2.88599 23.6379 0.7205025 0.006634856
##
## Forecasts:
## Point Forecast Lo 95 Hi 95
## 66.42857 8306.906 -1613.30826 18227.12
## 66.57143 12722.398 2644.21012 22800.59
## 66.71429 10265.902 -29.35596 20561.16
## 66.85714 7650.100 -2912.08897 18212.29
accuracy(forecast_EU)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 57.60673 4989.077 2799.547 -2.88599 23.6379 0.7205025 0.006634856
#actual new cases in EU on July 1, 2021 is 2337
(dataset %>% filter (location =='Belgium', date=='2021-07-01'))[6] +(dataset %>% filter (location =='Netherlands', date=='2021-07-01'))[6] +(dataset %>% filter (location =='Germany', date=='2021-07-01'))[6]
#difference between the actual and predicted
abs(2337 - 7650.1)
## [1] 5313.1
US2 <- US %>% filter (location =='United States', date>= '2021-01-01',date<='2021-06-01')
ggplot(data= US2, aes(date,new_cases)) + geom_line()
ts_US2 <- ts(US2$new_cases,frequency =7)
ts_US2
## Time Series:
## Start = c(1, 1)
## End = c(22, 5)
## Frequency = 7
## [1] 153916 300462 208853 184005 235042 255637 278337 295257 260967 213415
## [11] 214664 226967 230301 235766 242780 201858 177931 143598 176216 183261
## [21] 193856 190760 170759 131198 151677 147626 153961 168804 166613 142459
## [31] 112152 134975 115303 121691 124006 134422 104176 89746 90438 95265
## [41] 95250 105764 99670 87219 65135 54279 62498 70139 69911 79282
## [51] 71696 57152 56159 72270 74749 77504 77349 64626 51422 58098
## [61] 57098 67191 68060 66419 58254 41073 44917 57667 57895 62471
## [71] 61513 53031 38278 56541 54008 59280 60375 61651 55519 33822
## [81] 51436 53688 86938 67546 77377 62842 43223 69273 61429 66765
## [91] 79119 69887 63261 35133 77404 60674 75021 79894 82710 66687
## [101] 46509 70075 77966 75403 74308 80071 52532 42121 68105 60949
## [111] 62855 67278 62411 53495 32153 47568 50836 55150 58251 57919
## [121] 45391 29403 50491 40723 44704 47557 48130 33675 21427 36798
## [131] 33662 35826 38076 42260 28857 16875 28621 27789 29301 30206
## [141] 27946 19798 12868 25815 22739 23976 27448 21859 12001 6734
## [151] 5775 22940
decompose_US <- decompose(ts_US2, "additive")
plot(decompose_US)
acf(ts_US2)
pacf(ts_US2)
model_US2 <- auto.arima(ts_US2, ic="aic", trace =TRUE, approximation = F)
##
## ARIMA(2,1,2)(1,0,1)[7] with drift : Inf
## ARIMA(0,1,0) with drift : 3436.874
## ARIMA(1,1,0)(1,0,0)[7] with drift : 3378.737
## ARIMA(0,1,1)(0,0,1)[7] with drift : 3387.647
## ARIMA(0,1,0) : 3435.133
## ARIMA(1,1,0) with drift : 3429.823
## ARIMA(1,1,0)(2,0,0)[7] with drift : 3371.47
## ARIMA(1,1,0)(2,0,1)[7] with drift : 3373.47
## ARIMA(1,1,0)(1,0,1)[7] with drift : 3373.713
## ARIMA(0,1,0)(2,0,0)[7] with drift : 3411.079
## ARIMA(2,1,0)(2,0,0)[7] with drift : 3362.669
## ARIMA(2,1,0)(1,0,0)[7] with drift : 3366.716
## ARIMA(2,1,0)(2,0,1)[7] with drift : 3363.188
## ARIMA(2,1,0)(1,0,1)[7] with drift : Inf
## ARIMA(3,1,0)(2,0,0)[7] with drift : 3364.648
## ARIMA(2,1,1)(2,0,0)[7] with drift : 3364.656
## ARIMA(1,1,1)(2,0,0)[7] with drift : 3365.16
## ARIMA(3,1,1)(2,0,0)[7] with drift : 3366.179
## ARIMA(2,1,0)(2,0,0)[7] : 3360.707
## ARIMA(2,1,0)(1,0,0)[7] : 3364.741
## ARIMA(2,1,0)(2,0,1)[7] : 3361.256
## ARIMA(2,1,0)(1,0,1)[7] : Inf
## ARIMA(1,1,0)(2,0,0)[7] : 3369.485
## ARIMA(3,1,0)(2,0,0)[7] : 3362.683
## ARIMA(2,1,1)(2,0,0)[7] : 3362.692
## ARIMA(1,1,1)(2,0,0)[7] : 3363.236
## ARIMA(3,1,1)(2,0,0)[7] : 3364.184
##
## Best model: ARIMA(2,1,0)(2,0,0)[7]
model_US2
## Series: ts_US2
## ARIMA(2,1,0)(2,0,0)[7]
##
## Coefficients:
## ar1 ar2 sar1 sar2
## -0.9437 -0.4262 0.5047 0.3454
## s.e. 0.1223 0.1272 0.1266 0.1369
##
## sigma^2 estimated as 245647046: log likelihood=-1675.35
## AIC=3360.71 AICc=3361.12 BIC=3375.79
acf(ts(model_US2$residuals))
pacf(ts(model_US2$residuals))
forecast_US2<-forecast(model_US2, h=4, level=c(95))
forecast_US2
## Point Forecast Lo 95 Hi 95
## 22.71429 11074.415 -19644.36 41793.19
## 22.85714 17318.882 -13448.53 48086.29
## 23.00000 15319.137 -19357.15 49995.43
## 23.14286 4236.653 -33500.57 41973.87
plot(forecast_US2)
summary(forecast_US2)
##
## Forecast method: ARIMA(2,1,0)(2,0,0)[7]
##
## Model Information:
## Series: ts_US2
## ARIMA(2,1,0)(2,0,0)[7]
##
## Coefficients:
## ar1 ar2 sar1 sar2
## -0.9437 -0.4262 0.5047 0.3454
## s.e. 0.1223 0.1272 0.1266 0.1369
##
## sigma^2 estimated as 245647046: log likelihood=-1675.35
## AIC=3360.71 AICc=3361.12 BIC=3375.79
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -886.8403 15413.19 8651.75 -2.113732 10.95465 0.5474379 0.1816099
##
## Forecasts:
## Point Forecast Lo 95 Hi 95
## 22.71429 11074.415 -19644.36 41793.19
## 22.85714 17318.882 -13448.53 48086.29
## 23.00000 15319.137 -19357.15 49995.43
## 23.14286 4236.653 -33500.57 41973.87
accuracy(forecast_US2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -886.8403 15413.19 8651.75 -2.113732 10.95465 0.5474379 0.1816099
#difference between the actual and predicted
abs(14463 - 4236.653)
## [1] 10226.35